DSNParser.pm: Work around MySQL not resolving encodings sanely.

For some reason, MySQL won't resolve iso-8859-1 or latin-1 as latin1,
and will die if you try to use through in, e.g., SET NAMES.
This commit makes DSNParser resolve those cases on its own
before calling SET NAMES.
This commit is contained in:
Brian Fraser
2012-07-23 01:11:49 -03:00
parent 02c6421a25
commit 61a65fcdbd
3 changed files with 52 additions and 26 deletions

View File

@@ -2121,6 +2121,12 @@ sub fill_in_dsn {
$dsn->{D} ||= $db;
}
my %encoding_aliases = (
'utf-8' => 'utf8',
'iso-8859-1' => 'latin1',
'latin-1' => 'latin1',
);
sub get_dbh {
my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
$opts ||= {};
@@ -2129,7 +2135,7 @@ sub get_dbh {
RaiseError => 1,
PrintError => 0,
ShowErrorStatement => 1,
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf-?8/i ? 1 : 0),
};
@{$defaults}{ keys %$opts } = values %$opts;
@@ -2182,7 +2188,7 @@ sub get_dbh {
PTDEBUG && _d($dbh, $sql);
my ($sql_mode) = eval { $dbh->selectrow_array($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error getting the current SQL_MODE: $EVAL_ERROR";
}
$sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
@@ -2192,15 +2198,18 @@ sub get_dbh {
PTDEBUG && _d($dbh, $sql);
eval { $dbh->do($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error setting SQL_QUOTE_SHOW_CREATE, SQL_MODE"
. ($sql_mode ? " and $sql_mode" : '')
. ": $EVAL_ERROR";
}
if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
$sql = "/*!40101 SET NAMES $charset*/";
if ( my ($charset) = $cxn_string =~ m/charset=([-\w]+)/ ) {
$charset = $encoding_aliases{lc($charset)} || $charset;
$sql = qq{/*!40101 SET NAMES "$charset"*/};
PTDEBUG && _d($dbh, ':', $sql);
eval { $dbh->do($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error setting NAMES to $charset: $EVAL_ERROR";
}
PTDEBUG && _d('Enabling charset for STDOUT');
if ( $charset eq 'utf8' ) {
@@ -2212,12 +2221,12 @@ sub get_dbh {
}
}
if ( $self->prop('set-vars') ) {
$sql = "SET " . $self->prop('set-vars');
if ( my $var = $self->prop('set-vars') ) {
$sql = "SET $var";
PTDEBUG && _d($dbh, ':', $sql);
eval { $dbh->do($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error setting $var: $EVAL_ERROR";
}
}
}

View File

@@ -263,6 +263,18 @@ sub fill_in_dsn {
$dsn->{D} ||= $db;
}
# MySQL won't resolve iso-8859-1 or latin-1 as latin1, while Perl would, so
# we hardcode the aliases here. The UTF-8 case is a bit different;
# MySQL doesn't really support UTF-8 in SET NAMES, instead using
# their own definition, which is constrained to codepoints 0..0xFFFF, so
# rightfully calls it something different: utf8. I'm not actually sure
# if the naming convention is intended or plain lucky on their part, though.
my %encoding_aliases = (
'utf-8' => 'utf8',
'iso-8859-1' => 'latin1',
'latin-1' => 'latin1',
);
# Actually opens a connection, then sets some things on the connection so it is
# the way the Maatkit tools will expect. Tools should NEVER open their own
# connection or use $dbh->reconnect, or these things will not take place!
@@ -274,7 +286,7 @@ sub get_dbh {
RaiseError => 1,
PrintError => 0,
ShowErrorStatement => 1,
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf-?8/i ? 1 : 0),
};
@{$defaults}{ keys %$opts } = values %$opts;
@@ -336,7 +348,7 @@ sub get_dbh {
PTDEBUG && _d($dbh, $sql);
my ($sql_mode) = eval { $dbh->selectrow_array($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error getting the current SQL_MODE: $EVAL_ERROR";
}
$sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
@@ -346,16 +358,19 @@ sub get_dbh {
PTDEBUG && _d($dbh, $sql);
eval { $dbh->do($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error setting SQL_QUOTE_SHOW_CREATE, SQL_MODE"
. ($sql_mode ? " and $sql_mode" : '')
. ": $EVAL_ERROR";
}
# Set character set and binmode on STDOUT.
if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
$sql = "/*!40101 SET NAMES $charset*/";
if ( my ($charset) = $cxn_string =~ m/charset=([-\w]+)/ ) {
$charset = $encoding_aliases{lc($charset)} || $charset;
$sql = qq{/*!40101 SET NAMES "$charset"*/};
PTDEBUG && _d($dbh, ':', $sql);
eval { $dbh->do($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error setting NAMES to $charset: $EVAL_ERROR";
}
PTDEBUG && _d('Enabling charset for STDOUT');
if ( $charset eq 'utf8' ) {
@@ -367,12 +382,12 @@ sub get_dbh {
}
}
if ( $self->prop('set-vars') ) {
$sql = "SET " . $self->prop('set-vars');
if ( my $var = $self->prop('set-vars') ) {
$sql = "SET $var";
PTDEBUG && _d($dbh, ':', $sql);
eval { $dbh->do($sql) };
if ( $EVAL_ERROR ) {
die $EVAL_ERROR;
die "Error setting $var: $EVAL_ERROR";
}
}
}

View File

@@ -23,7 +23,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 12;
plan tests => 13;
}
my $output;
@@ -80,25 +80,28 @@ sub test_charset {
$sb->load_file('master', 't/pt-archiver/samples/table1.sql');
local $@;
eval {
my ($out, $exit_val) = full_output( sub {
pt_archiver::main("-c", "b,c", qw(--where 1=1 --header),
"--source", "D=test,t=table_1,F=$cnf",
'--file', '/tmp/%Y-%m-%d-%D_%H:%i:%s.%t',
'--no-check-charset',
'--charset', $charset,
);
};
});
ok !$@, "--charset $charset works";
is($exit_val,
0,
"--charset $charset works"
) or diag($out);
}
for my $charset (qw(latin1 iso-8859-1 utf8 UTF-8 )) {
for my $charset (qw(latin1 iso-8859-1 latin-1 utf8 UTF-8 UTF8 )) {
test_charset($charset);
}
my $warning;
local $SIG{__WARN__} = sub { $warning .= shift };
my $out = output( sub {
my ($out) = full_output( sub {
$sb->load_file('master', 't/pt-archiver/samples/table1.sql');
pt_archiver::main("-c", "b,c", qw(--where 1=1 --header),
"--source", "D=test,t=table_1,F=$cnf",
@@ -109,8 +112,7 @@ my $out = output( sub {
},
);
like($out, qr/\QCannot open :encoding(some_chars/, "..but an unknown charset fails");
like($warning, qr/Cannot find encoding/, "..and throws a useful warning");
like($out, qr/\QError setting NAMES to some_charset_that_doesn/, "..but an unknown charset fails");
# #############################################################################
# Done.